library(memisc)
library(methylclock)
library(Biobase)
library(glmnet)
library(ppcor)
library(forestplot)
library(rstatix)

#Prepare covariate and outcome database#################################################################

dbhelix<-read.csv("oliverap77_78_20210716.csv", header=T)

dbhelix$h_ethnicity_3catc<-NA
dbhelix$h_ethnicity_3catc[dbhelix$h_ethnicity_c== "Caucasian"]<-"WhiteEur_WhiteOther"
dbhelix$h_ethnicity_3catc[dbhelix$h_ethnicity_c== "Pakistani" | dbhelix$h_ethnicity_c== "Asian"]<-"Pakistani_Asian"
dbhelix$h_ethnicity_3catc[dbhelix$h_ethnicity_c== "Other" | dbhelix$h_ethnicity_c== "African" | dbhelix$h_ethnicity_c== "Native_American"]<-"Other"

dbhelix$h_sex<-as.factor(dbhelix$h_sex)
dbhelix$hs_globalexp2<-as.factor(dbhelix$hs_globalexp2)
dbhelix$h_cohort<-as.factor(dbhelix$h_cohort)

dbhelix$hs_globalsmok_m_2<-ifelse(dbhelix$hs_globalsmok_m_None=="smoker",1,0)
dbhelix$hs_globalsmok_m_2<-as.factor(dbhelix$hs_globalsmok_m_2)
dbhelix$bw_kg<-dbhelix$e3_bw/1000

##physical activity#########
hist(dbhelix$hs_mvpa)
vTert = quantile(dbhelix$hs_mvpa, c(0:3/3), na.rm = T)
dbhelix$hs_PA.cat = cut(dbhelix$hs_mvpa,  vTert,  include.lowest = T,  labels = c("Low", "Medium", "High"))
dbhelix$hs_PA.cat<-as.factor(dbhelix$hs_PA.cat)
table(dbhelix$hs_PA.cat)

##
dbhelix$FAS_cat<-as.factor(dbhelix$FAS_cat)
dbhelix$h_edumc<-as.factor(dbhelix$h_edumc)

##social capital#########
dbhelix$hs_enjoy_area[dbhelix$hs_enjoy_area==3]<-NA
dbhelix$sc1<-recode(dbhelix$hs_enjoy_area, "2 = 1;5 = 2; 6 = 3;4 = 4; 1 = 5")
dbhelix$hs_help_neigh[dbhelix$hs_help_neigh==3]<-NA
dbhelix$sc2<-recode(dbhelix$hs_help_neigh, "2 = 1;5 = 2; 6 = 3;4 = 4; 1 = 5")
dbhelix$hs_trust_neigh[dbhelix$hs_trust_neigh==3]<-NA
dbhelix$sc3<-recode(dbhelix$hs_trust_neigh, "2 = 1;5 = 2; 6 = 3;4 = 4; 1 = 5")
dbhelix$hs_ask_advice[dbhelix$hs_ask_advice==3]<-NA
dbhelix$sc4<-recode(dbhelix$hs_ask_advice, "2 = 1;5 = 2; 6 = 3;4 = 4; 1 = 5")
table(dbhelix$contactfam_3cat_num)
dbhelix$contactfam_3cat_num_rev<-recode(dbhelix$contactfam_3cat_num, "3 = 1;2 = 2; 1 = 3")
table(dbhelix$contactfam_3cat_num_rev)
dbhelix$contactfam_3cat_num_rev<-as.factor(dbhelix$contactfam_3cat_num_rev)

dbhelix$SCS<-dbhelix$sc1+dbhelix$sc2+dbhelix$sc3+dbhelix$sc4+dbhelix$participation_3cat+dbhelix$contactfam_3cat_num_rev
hist(dbhelix$SCS)
vTert = quantile(dbhelix$SCS, c(0:3/3), na.rm = T)
dbhelix$SCS.cat = cut(dbhelix$SCS,  vTert,  include.lowest = T,  labels = c("Low", "Medium", "High"))
dbhelix$SCS.cat<-as.factor(dbhelix$SCS.cat)
table(dbhelix$SCS.cat)

##PDS coding#####
table(dbhelix$hs_start_growc)
dbhelix$PDSQ1<-NA
dbhelix$PDSQ1[dbhelix$hs_start_growc=="No "]<-1
dbhelix$PDSQ1[dbhelix$hs_start_growc=="Yes (Barely) "]<-2
dbhelix$PDSQ1[dbhelix$hs_start_growc=="Yes (Definitely) "]<-3
table(dbhelix$PDSQ1)                                          

table(dbhelix$hs_start_bodyhairc)
dbhelix$PDSQ2<-NA
dbhelix$PDSQ2[dbhelix$hs_start_bodyhairc=="No "]<-1
dbhelix$PDSQ2[dbhelix$hs_start_bodyhairc=="Yes (Barely) "]<-2
dbhelix$PDSQ2[dbhelix$hs_start_bodyhairc=="Yes (Definitely) "]<-3
table(dbhelix$PDSQ2) 

table(dbhelix$hs_skin_pimplesc)
dbhelix$PDSQ3<-NA
dbhelix$PDSQ3[dbhelix$hs_skin_pimplesc=="No "]<-1
dbhelix$PDSQ3[dbhelix$hs_skin_pimplesc=="Yes (Barely) "]<-2
dbhelix$PDSQ3[dbhelix$hs_skin_pimplesc=="Yes (Definitely) "]<-3
table(dbhelix$PDSQ3) 

table(dbhelix$hs_voice_cboy)
dbhelix$PDSQ4<-NA
dbhelix$PDSQ4[dbhelix$hs_voice_cboy=="No "]<-1
dbhelix$PDSQ4[dbhelix$hs_voice_cboy=="Yes (Barely) "]<-2
dbhelix$PDSQ4[dbhelix$hs_voice_cboy=="Yes (Definitely) "]<-3
table(dbhelix$PDSQ4) 

table(dbhelix$hs_hair_cboy)
dbhelix$PDSQ5<-NA
dbhelix$PDSQ5[dbhelix$hs_hair_cboy=="No "]<-1
dbhelix$PDSQ5[dbhelix$hs_hair_cboy=="Yes (Barely) "]<-2
dbhelix$PDSQ5[dbhelix$hs_hair_cboy=="Yes (Definitely) "]<-3
table(dbhelix$PDSQ5)

table(dbhelix$hs_breast_cgirl)
dbhelix$PDSQ6<-NA
dbhelix$PDSQ6[dbhelix$hs_breast_cgirl=="No "]<-1
dbhelix$PDSQ6[dbhelix$hs_breast_cgirl=="Yes (Barely) "]<-2
dbhelix$PDSQ6[dbhelix$hs_breast_cgirl=="Yes (Definitely) "]<-3
table(dbhelix$PDSQ6)

table(dbhelix$hs_menses_cgirl)
dbhelix$PDSQ7<-NA
dbhelix$PDSQ7[dbhelix$hs_menses_cgirl=="No "]<-1
dbhelix$PDSQ7[dbhelix$hs_menses_cgirl=="Yes (Definitely) "]<-4
table(dbhelix$PDSQ7)

dbhelix$PDS.b<-(dbhelix$PDSQ5+dbhelix$PDSQ4+dbhelix$PDSQ3+dbhelix$PDSQ2+dbhelix$PDSQ1)/5
dbhelix$PDS.g<-(dbhelix$PDSQ7+dbhelix$PDSQ6+dbhelix$PDSQ3+dbhelix$PDSQ2+dbhelix$PDSQ1)/5
dbhelix$PDS[dbhelix$h_sex==1]<-dbhelix$PDS.b[dbhelix$h_sex==1]
dbhelix$PDS[dbhelix$h_sex==2]<-dbhelix$PDS.g[dbhelix$h_sex==2]
dbhelix$PD<-ifelse(dbhelix$PDS>1,1,0)

tapply(dbhelix$PD, dbhelix$h_cohort, table)
dbhelix$PD=as.factor(dbhelix$PD)

##Import spirometry and process data####
spiro <- read.csv("Spiro_PP_2017-23-02.csv")
spiro2 <- spiro[spiro$Acceptability_FEV1 == 1 & spiro$Reprod_stringent_FEV1 == 1 & spiro$Panel == "sub" &
                  spiro$FEV1_PP >= 60 & spiro$FEV1_PP <= 140, ]
spiro2 <- by(spiro2[,"FEV1_PP"],spiro2$HelixID,max)
spiro2 <- data.frame(cbind(FEV1_PP = as.numeric(spiro2), HelixID = names(spiro2)))
spiro2$FEV1_PP <- as.numeric(as.character(spiro2$FEV1_PP))

spiro3 <- merge(x = spiro2, y = spiro, by = c("HelixID", "FEV1_PP"), all.x = T)
spiro3 <- spiro3[spiro3$HelixID %in% dbhelix$HelixID, ]

# For 6 individuals, 2 curves corresponds to max(FEV1); for them, we chose the curve with the highest FVC
spiro4 <- by(spiro3[, "FVC_PP"], spiro3$HelixID, max)
spiro4 <- data.frame(cbind(FVC_PP = as.numeric(spiro4), HelixID = names(spiro4)))
spiro4$FVC_PP <- as.numeric(as.character(spiro4$FVC_PP))

Helix_spiro <- merge(x = spiro4, y = spiro3, by = c("HelixID", "FVC_PP"), all.x = T)
table(Helix_spiro$Cohort)
hist(Helix_spiro$FEV1_PP)

dbhelix<-merge(dbhelix,Helix_spiro[c("HelixID", "FVC_PP", "FEV1_PP")], all.x = T, by="HelixID")
summary(dbhelix$FEV1_PP)

##transform and scale outcome distributions#####
dbhelix$hs_Gen_Int.log<-log(dbhelix$hs_Gen_Int+1)
dbhelix$hs_Gen_Tot.log<-log(dbhelix$hs_Gen_Tot+1)

toscale<- c("hs_fatprop_bia",
            "hs_dcolors3"  , "hs_hitrtse" ,"hs_correct_raven"  ,    "hs_Gen_Int.log"  ,
            "hs_Gen_Ext.log" , "FEV1_PP" )

dbscale<-dbhelix[,toscale]
dbscale<-scale(dbscale)
dbscale<-as.data.frame(dbscale)
names(dbscale)<-paste0(names(dbscale),".sc")
dbhelix<-cbind(dbhelix,dbscale)
remove(dbscale)
names(dbhelix)


#prepare biological age markers############################################################################
##Telomere Length######
dbhelix$TL.rev<-dbhelix$TL * -1 #reverse for comparison with age acceleration markers
dbhelix$TL.rev.sc<-scale(dbhelix$TL.rev)

###plot fig1 telomere length age correlation##########
plot( dbhelix$age_sample_years,dbhelix$TL,ylab = "Biological Age", xlab ="Age, yrs" , pch=19, main="Telomere Length, R = -.07, p = 0.02",cex.axis = 1.5,cex.main = 2.5 ,cex.lab = 2)

##DNA methylation clock#########
load("methylome_subcohort_ComBatSlide_6cells_notfitr_v4.Rdata")
dim(getBeta(methylome_subcohort_ComBatSlide_notfitr))
pheno_helix <- pData(methylome_subcohort_ComBatSlide_notfitr) #obtain pheno data

helix_epigenetic_clocks <- DNAmAge(methylome_subcohort_ComBatSlide_notfitr, age=pheno_helix$age_sample_years, clocks = "all") 
pheno_helix<-as.data.frame(pheno_helix)
helix_epigenetic_clocks<-as.data.frame(helix_epigenetic_clocks)
helix_epigenetic_clocks2<-cbind(helix_epigenetic_clocks,pheno_helix)

###Add to db and calculate delta age####

dbhelix=merge(dbhelix,helix_epigenetic_clocks2[c("skinHorvath" ,
                                  "HelixID","NK" ,"Bcell" , "CD4T", 
                                  "CD8T" , "Eos" , "Mono" ,"Neu" )], all.x=T, by="HelixID")

dbhelix$ageAcc.skinHorvath<-dbhelix$skinHorvath-dbhelix$hs_age_years

remove(methylome_subcohort_ComBatSlide_notfitr,pheno_helix, helix_epigenetic_clocks,helix_epigenetic_clocks2  )

###plot fig1 DNA methylation age correlation##########
plot( dbhelix$age_sample_years,dbhelix$skinHorvath,ylab = "Biological Age", xlab ="Age, yrs" , pch=19, main="DNA methylation age, R = .85",cex.axis = 1.5,cex.main = 2.5 ,cex.lab = 2)

##Transcriptome age####
###load data####

load("transcriptome_subcohort_f1_combat2_v3.RData")##Helix subcohort data

names(transcriptome_subcohort_f1_combat2@phenoData@data)
featuredata<-transcriptome_subcohort_f1_combat2@featureData@data

db<-as.data.frame((transcriptome_subcohort_f1_combat2@phenoData@data))
table(transcriptome_subcohort_f1_combat2$Period)
mrnas<-transcriptome_subcohort_f1_combat2@assayData$exprs
mrnas<-as.data.frame(t(mrnas))

remove(transcriptome_subcohort_f1_combat2)

load("transcriptome_panel_f1_combat2_v3.RData")##HELIX panel data

names(transcriptome_panel_f1_combat2@phenoData@data)
db.p2<-as.data.frame((transcriptome_panel_f1_combat2@phenoData@data))
mrnas.p2<-transcriptome_panel_f1_combat2@assayData$exprs
mrnas.p2<-as.data.frame(t(mrnas.p2))
length(db.p2$HelixID[db.p2$Period=="1B"])
mrnas.p2<-mrnas.p2[db.p2$Period=="1B",]##take panel study 2 data
rownames(mrnas.p2)<-db.p2$HelixID[db.p2$Period=="1B"]
db.p2<-db.p2[db.p2$Period=="1B",]

remove(transcriptome_panel_f1_combat2)

###scale and split into train test set######
omics<-rbind(mrnas,mrnas.p2)
omics<-scale(omics)
omics<-as.data.frame(omics)

mrnas.sc<-omics[1:1157,]
mrnas.p2.sc<-omics[1158:1285,]
remove(omics)

###train model#######
foldid <- sample(1:10, size = length( db$hs_age_years), replace = TRUE)

set.seed(1)

alphas<-seq(0,1,by =0.1)
alpharesults<-matrix(NA, length(alphas), 3)
alpharesults[,1]<-alphas
i=1
for(i in 1:length(alphas)){
  print(alphas[i])
  omics.cv = cv.glmnet(data.matrix(mrnas.sc),  db$age_sample_years,keep=T, foldsid=foldid, alpha=alphas[i],family="gaussian")
  
  alpharesults[i,2]<-omics.cv$lambda.min
  alpharesults[i,3]<-omics.cv$cvm[which(omics.cv$lambda==omics.cv$lambda.min)]
  
}
alpharesults<-as.data.frame(alpharesults)
names(alpharesults)<-c("alpha", "lambda", "cvmse")
alpharesults$alpha[which(alpharesults$cvmse==min(alpharesults$cvmse))]
foundalpha<-alpharesults$alpha[which(alpharesults$cvmse==min(alpharesults$cvmse))]

omics.cv = cv.glmnet(data.matrix(mrnas.sc),  db$age_sample_years,keep=T, alpha=foundalpha,family="gaussian")

validation<-assess.glmnet(omics.cv$fit.preval, newy=db$age_sample_years, family = "gaussian")
db$cv.mRNAage<-as.numeric(omics.cv$fit.preval[,which(omics.cv$lambda==omics.cv$lambda.min)])

glmnet.omics.c = glmnet(as.matrix(mrnas.sc), db$age_sample_years, family="gaussian", alpha=foundalpha, nlambda=100)
db$mRNAage<-predict(glmnet.omics.c,as.matrix(mrnas.sc),type="response",omics.cv$lambda.min)

###examine performance in HELIX subcohort######
cor.test(db$age_sample_years,db$mRNAage)
mean(abs(db$age_sample_years-db$mRNAage))
cor.test(db$age_sample_years,db$cv.mRNAage)

### plot fig 1 transcriptome age correlation plot####
plot( db$age_sample_years,db$mRNAage,ylab = "Biological Age", xlab ="Age, yrs" , pch=19, main="Transcriptome Age, R = .94",cex.axis = 1.5,cex.main = 2.5 ,cex.lab = 2)


###test in panel#######
db.p2$mRNAage<-predict(glmnet.omics.c,as.matrix(mrnas.p2.sc),type="response",omics.cv$lambda.min)

cor.test(db.p2$age_sample_years,db.p2$mRNAage)
mean(abs(db.p2$age_sample_years-db.p2$mRNAage))

###fig 1 validation plot######
plot( db.p2$age_sample_years,db.p2$mRNAage, pch=19,ylab="Biological Age", xlab ="Age, yrs", main="Transcriptome Age , R = .93",cex.axis = 1.5,cex.main = 2 ,cex.lab = 1.5)

###figure s1#####
db.a<-db[,names(db.p2)]
db.a$visit<-1
db.b<-db.p2
db.b$visit<-2
db.long<-rbind(db.a,db.b)
remove(db.a,db.b)

panelIDs<-intersect(db$HelixID,db.p2$HelixID )


boxplot(db.long$mRNAage[db.long$HelixID %in% panelIDs]~ db.long$visit[db.long$HelixID %in% panelIDs], xlab= "Panel Clinic Visit", ylab = "Transcriptome age", main="Transcriptome age",cex.lab = 1.5,cex.axis = 1.5,cex.main = 2)
t.test(db.long$mRNAage[db.long$HelixID %in% panelIDs]~ db.long$visit[db.long$HelixID %in% panelIDs], alternative = "less",  paired = T)

###extract model coefficients for table S2#####
mrna.coefs<-as.matrix(coef(omics.cv, s = "lambda.min"))
featuredata$modelcoeffs<-mrna.coefs[-1,1]

### hypergeometric test against Peters et al#########
Peters<-read.csv("Peters2015AdultmRAResults.csv",header=T)
Peters<-Peters[1:1497,]
Peters$EntrezeGeneID_Affy<-Peters$NEW.Entrez.ID
peterslist<-Peters$EntrezeGeneID_Affy
peterslist<-peterslist[which(peterslist %in% intersect(featuredata$EntrezeGeneID_Affy, Peters$NEW.Entrez.ID))]

featuredata<-merge(featuredata, Peters[, 9:11], all.x = T, by= "EntrezeGeneID_Affy")

selectedlist<-featuredata$EntrezeGeneID_Affy[featuredata$modelcoeffs !=0]
nonselectedlist<-featuredata$EntrezeGeneID_Affy[featuredata$modelcoeffs ==0]

petersinselected<-intersect(peterslist, selectedlist)
petersinnotselected<-intersect(peterslist, nonselectedlist)

hitInSample<-length(petersinselected)
hitInPop<-length(petersinnotselected)
failInPop<-nrow(featuredata)-hitInPop
sampleSize <-length(peterslist)

phyper(hitInSample-1, hitInPop, failInPop, sampleSize, lower.tail= FALSE)

#### write table S2 with annotation information#####
exportcoefs<-featuredata[featuredata$modelcoeffs!=0,]
write.csv(exportcoefs, file="TA_coefs_full.csv", row.names = F)

###Add to db and calculate delta age####
dbhelix<-merge(dbhelix, db["mrnaage", "HelixID"], all.x = T, by= "HelixID")
dbhelix$ageAcc.mRNA<-dbhelix$mRNAage-dbhelix$hs_age_years

##Immunometabolic clock###########
###load data#####
load("metab_serum_subcohort_v3.Rdata")
smets<-metab_serum_subcohort@assayData$exprs
smets<-as.data.frame(t(smets))
rownames(smets)<-metab_serum_subcohort$HelixID
smets<-as.data.frame(smets)
names(smets)<-metab_serum_subcohort@featureData@data$Rvar

load("metab_urine_subcohort_v3.Rdata")
umets<-metab_urine_subcohort@assayData$exprs
umets<-as.data.frame(t(umets))
rownames(umets)<-metab_urine_subcohort$HelixID


load("proteome_subcohort_v5.Rdata")
sprots<-proteome_subcohort@assayData$exprs
sprots<-as.data.frame(t(sprots))
rownames(sprots)<-proteome_subcohort$HelixID

remove(proteome_subcohort,metab_urine_subcohort,metab_serum_subcohort )

omics.c<-merge(umets, smets, all.x=T, by = 0)
rownames(omics.c)<-omics.c[,1]
omics.c$Row.names<-NULL
omics.c<-merge(omics.c, sprots, all.x=T, by = 0)
rownames(omics.c)<-omics.c[,1]
omics.c$Row.names<-NULL
omics.c<-omics.c[which(complete.cases(omics.c)==T),]


load("metab_serum_panel_v3.Rdata")
smets2<-metab_serum_panel@assayData$exprs
smets2<-as.data.frame(t(smets2))
smets2<-smets2[metab_serum_panel$Period=="1B",]
rownames(smets2)<-metab_serum_panel$HelixID[metab_serum_panel$Period=="1B"]
names(smets2)<-metab_serum_panel@featureData@data$Rvar

load("metab_urine_panel_v3.Rdata")
umets2<-metab_urine_panel@assayData$exprs
umets2<-as.data.frame(t(umets2))
umets2<-umets2[metab_urine_panel$Period=="1B",]
rownames(umets2)<-metab_urine_panel$HelixID[metab_urine_panel$Period=="1B"]
names(umets2)<-metab_urine_panel@featureData@data$Rvar

load("proteome_panel_v5.Rdata")
sprots2<-proteome_panel@assayData$exprs
sprots2<-as.data.frame(t(sprots2))
sprots2<-sprots2[proteome_panel$Period=="1B",]
rownames(sprots2)<-proteome_panel$HelixID[proteome_panel$Period=="1B"]

omics.c2<-merge(umets2, smets2, all.x=T, by = 0)
rownames(omics.c2)<-omics.c2[,1]
omics.c2$Row.names<-NULL
omics.c2<-merge(omics.c2, sprots2, all.x=T, by = 0)
rownames(omics.c2)<-omics.c2[,1]
omics.c2$Row.names<-NULL
omics.c2<-omics.c2[which(complete.cases(omics.c2)==T),]

db.om2<-metab_serum_panel@phenoData@data
db.om2<-db.om2[db.om2$Period=="1B",]
db.om2<-db.om2[db.om2$HelixID %in% rownames(omics.c2),]
omics.c2<- omics.c2[match(db.om2$HelixID, rownames(omics.c2)), ]

summary(db.om2$age_sample_years)
db.om2$h_cohort2<-as.character(db.om2$h_cohort)
db.om2$h_cohort2[db.om2$h_cohort=="SAB"]<-"INMA"
db.om2$h_cohort<-as.factor(db.om2$h_cohort2)
db.om2$h_cohort2<-NULL

db.om2$h_sex<-ifelse(db.om2$e3_sex=="male",1,2)

###scale and split into train test set####
omics.long<-rbind(omics.c,omics.c2)
omics.long.sc<-scale(omics.long)
omics<-omics.long.sc[1:1152,]
omics2.sc<-omics.long.sc[1153:1305,]

###train model#####
foldid <- sample(1:10, size = length( db.om$hs_age_years), replace = TRUE)

set.seed(1)

alphas<-seq(0,1,by =0.05)
alpharesults<-matrix(NA, length(alphas), 4)
alpharesults[,1]<-alphas
i=1
for(i in 1:length(alphas)){
  print(alphas[i])
  omics.cv = cv.glmnet(data.matrix(omics),  db.om$hs_age_years,keep=T, foldsid=foldid,alpha=alphas[i],family="gaussian")#, penalty.factor=pen.fac)
  
  alpharesults[i,2]<-omics.cv$lambda.min
  alpharesults[i,3]<-omics.cv$cvm[which(omics.cv$lambda==omics.cv$lambda.min)]
  cvmage.i<-as.numeric(omics.cv$fit.preval[,which(omics.cv$lambda==omics.cv$lambda.min)])
  pc<-pcor.test(db.om$hs_age_years,cvmage.i,db.om[,c("h_sex",cohorts)])
  alpharesults[i,4]<-pc$estimate
}
alpharesults<-as.data.frame(alpharesults)
names(alpharesults)<-c("alpha", "lambda", "cvmse", "cv.pcor")
plot(alpharesults$cvmse, alpharesults$cv.pcor, pch=19)
text(alpharesults$cvmse, alpharesults$cv.pcor,labels=alphas, cex=0.7, pos=3)
alpharesults$alpha[which(alpharesults$cvmse==min(alpharesults$cvmse))]
foundalpha<-alpharesults$alpha[which(alpharesults$cv.pcor==max(alpharesults$cv.pcor))]

##final model##
omics.cv = cv.glmnet(data.matrix(omics),  db.om$hs_age_years,keep=T, foldsid=foldid,alpha=foundalpha,family="gaussian")

validation<-assess.glmnet(omics.cv$fit.preval, newy=db.om$hs_age_years, family = "gaussian")
validation$mse[which(omics.cv$lambda==omics.cv$lambda.min)]
db.om$cvmage.unadj<-as.numeric(omics.cv$fit.preval[,which(omics.cv$lambda==omics.cv$lambda.min)])

glmnet.omics.c = glmnet(as.matrix(omics), db.om$hs_age_years, family="gaussian", alpha=foundalpha, nlambda=100)
db.om$predage.unadj<-predict(glmnet.omics.c,as.matrix(omics),type="response",omics.cv$lambda.min)

###examine preformance in HELIX subcohort#####

cor.test(db.om$age_sample_years,db.om$predage.unadj)
mean(abs(db.om$age_sample_years-db.om$predage.unadj))
cor.test(db.om$age_sample_years,db.om$cvmage.unadj)

###plot fig1 Immunometabolic age correlation##########
plot( db$age_sample_years,db$mRNAage,ylab = "Biological Age", xlab ="Age, yrs" , pch=19, main="Immunometabolic Age, R = .86",cex.axis = 1.5,cex.main = 2.5 ,cex.lab = 2)

###test in panel######
db.om2$predage.unadj<-predict(glmnet.omics.c,as.matrix(omics2.sc),type="response",omics.cv$lambda.min)

cor.test(db.om2$age_sample_years,db.om2$predage.unadj)
mean(abs(db.om2$age_sample_years-db.om2$predage.unadj))

###fig 1 validation plot######
plot( db.om2$age_sample_years,db.om2$predage.unadj, pch=19,ylab="Biological Age", xlab ="Age, yrs", main="Immunometabolic Age , R = .84",cex.axis = 1.5,cex.main = 2 ,cex.lab = 1.5)

###figure s1#####

db.a<-db.om[,c("predage.unadj", "age_sample_years")]
db.a$visit<-1
db.b<-db.om2[,c("predage.unadj", "age_sample_years")]
db.b$visit<-2
db.long<-rbind(db.a,db.b)
remove(db.a,db.b)

panelIDs<-intersect(db.om$HelixID,db.om2$HelixID )
boxplot(db.long$predage.unadj[db.long$HelixID %in% panelIDs]~ db.long$visit[db.long$HelixID %in% panelIDs], xlab= "Panel Clinic Visit", ylab = "Immunometabolic Age", main="Immunometabolic Age",cex.lab = 1.5,cex.axis = 1.5,cex.main = 2)
t.test(db.long$predage.unadj[db.long$HelixID %in% panelIDs]~ db.long$visit[db.long$HelixID %in% panelIDs], alternative = "less",  paired = T)

###Add to db and calculate delta age####
dbhelix<-merge(dbhelix, db.om["predage.unadj", "HelixID"], all.x = T, by= "HelixID")
dbhelix$ageAcc.IM<-dbhelix$predage.unadj-dbhelix$hs_age_years

###extract model coefficients for table S1#####
exporttable<-as.matrix(coef(omics.cv, s = "lambda.min"))
write.csv(exporttable, file="IMage_coefs.csv", row.names = T)

##Figure s2 and s3 Correlation by cohort#######

#exposure = "mRNAage"
exposure="predage.unadj"
pdf(file=paste(exposure, "age_performace_bycohort.pdf"))
par(mfrow = c(2,3))
for(i in 1:6){
  corresult<-cor.test(dbhelix$hs_age_years[dbhelix$h_cohort ==cohorts[i]],dbhelix[dbhelix$h_cohort ==cohorts[i],exposure])
  mae = mean(abs(dbhelix$hs_age_years[dbhelix$h_cohort ==cohorts[i]]-dbhelix[dbhelix$h_cohort ==cohorts[i],exposure]), na.rm=T)   
  plot(dbhelix$hs_age_years[dbhelix$h_cohort ==cohorts[i]],dbhelix[dbhelix$h_cohort ==cohorts[i],exposure], main = cohorts[i], xlab = "Chronological age, yrs", ylab = paste0(Titles[j],"%"))
  legend("topright",legend=c(paste0("MAE= ",round(mae, digits = 2)," r= ",round(corresult$estimate, digits = 2), " p= ",signif(corresult$p.value, digits = 2))),  bty="n", cex=0.8)  
}
dev.off()

#Statistical analysis###########################################
exposures=c("TL.rev.sc", "ageAcc.skinHorvath", "ageAcc.mRNA", "ageAcc.IM" )

cat2<-c( "h_sex", "hs_globalexp2", "hs_globalsmok_m_2")
cat3<-c( "h_ethnicity_3catc",    "h_edumc",  "FAS_cat","SCS.cat" ,  "hs_PA.cat")
contvar<-c("hs_KIDMED", "bw_kg","e3_gac" )

cohorts=c( "BIB", "EDEN", "INMA", "KANC", "MOBA", "RHEA")

outcome1= c( "hs_zheight", "hs_zbmi_theano", "hs_fatprop_bia.sc",
             "hs_dcolors3.sc"  , "hs_hitrtse.sc" ,"hs_correct_raven.sc"  ,    "hs_Gen_Int.log.sc"  ,
             "hs_Gen_Ext.log.sc" , "FEV1_PP.sc" )
outcome3= c("PD" )

##missings table#######
exposures_NAtables = list()

for(m in exposures){
  db.s=dbhelix[which(!is.na(dbhelix[m])),c(m,"h_cohort",cat2,cat3,contvar,outcome1,outcome3,"NK","Bcell","CD4T","CD8T","Mono","Eos", "Neu")]
  cohortN<-as.vector(table(db.s$h_cohort))
  
  i=1
  j=1
  Natable<-matrix(NA,ncol(db.s),8)
  Natable[,1]<-names(db.s)
  for(i in 1:ncol(db.s)){
    Natable[i,2]<-round(sum(is.na(db.s[,i]))/nrow(db.s),digits=2)
    for (j in 1:length(cohorts)){
      Natable[i,j+2]<-round(sum(is.na(db.s[db.s$h_cohort==cohorts[j],i]))/cohortN[j],digits=2)
      Natable[i,j+2]<-round(sum(is.na(db.s[db.s$h_cohort==cohorts[j],i]))/cohortN[j],digits=2)
      Natable[i,j+2]<-round(sum(is.na(db.s[db.s$h_cohort==cohorts[j],i]))/cohortN[j],digits=2)
      Natable[i,j+2]<-round(sum(is.na(db.s[db.s$h_cohort==cohorts[j],i]))/cohortN[j],digits=2)
      Natable[i,j+2]<-round(sum(is.na(db.s[db.s$h_cohort==cohorts[j],i]))/cohortN[j],digits=2)
      Natable[i,j+2]<-round(sum(is.na(db.s[db.s$h_cohort==cohorts[j],i]))/cohortN[j],digits=2)
    }
  }
  Natable<-as.data.frame(Natable)
  names(Natable)<-c("Variable",paste(m,"Total", nrow(db.s), sep="_"),paste(m,cohorts, cohortN, sep="_"))
  
  exposures_NAtables[[m]]<-Natable
  
}

###write table S6####
exporttable<-cbind(exposures_NAtables[[ exposures[1]]],exposures_NAtables[[ exposures[2] ]],
            exposures_NAtables[[ exposures[3]]],exposures_NAtables[[ exposures[4] ]])
exporttable<-exporttable[,c(1:2,10,18,26)]
write.csv(exporttable, file= "Missings%.csv", row.names = F)
remove(exporttable)

##figure 2 partial correlation heatmap####
dbhelix$BIB<-ifelse(dbhelix$h_cohort=="BIB",1,0)
dbhelix$INMA<-ifelse(dbhelix$h_cohort=="INMA",1,0)
dbhelix$EDEN<-ifelse(dbhelix$h_cohort=="EDEN",1,0)
dbhelix$MOBA<-ifelse(dbhelix$h_cohort=="MOBA",1,0)
dbhelix$RHEA<-ifelse(dbhelix$h_cohort=="RHEA",1,0)
dbhelix$KANC<-ifelse(dbhelix$h_cohort=="KANC",1,0)

clockcor<-corrPart(dbhelix, c("TL", "skinHorvath","mRNAage","predage.unadj"), c(cohorts,"hs_age_years" ),  n  = T)
clockcor$matrix

##health risk factors analysis###########
covs<-c("h_cohort", "h_sex","h_ethnicity_3catc", "hs_age_years" )
#covs<-covs4

exposures_results_mod0 = list()

for(m in exposures){
  
  table1<-matrix(NA,(length(cat2)*2)+(length(cat3)*3)+ length(contvar) + 6, 7)###
  
  dbtable<-dbhelix[which(!is.na(dbhelix[m])),cat2]
  i=1
  j= seq(1, (length(cat2)*2), by=2)
  for(i in 1:length(cat2))
  {
    table1[j[i],1]<-paste(names(dbtable)[i],levels(dbtable[,i])[1], sep="-")
    table1[j[i]+1,1]<-paste(names(dbtable)[i],levels(dbtable[,i])[2], sep="-")
    
    table1[j[i],2]<-paste(length(dbtable[dbtable[,i]==levels(dbtable[,i])[1] & !is.na(dbtable[,i]),i]),
                          " (", 
                          round(length(dbtable[dbtable[,i]==levels(dbtable[,i])[1] & !is.na(dbtable[,i]),i]) /length(dbtable[!is.na(dbtable[,i]),i])*100,digits=1),
                          ")" )
    
    table1[j[i]+1,2]<-paste(length(dbtable[dbtable[,i]==levels(dbtable[,i])[2] & !is.na(dbtable[,i]) ,i]),
                            " (", 
                            round(length(dbtable[dbtable[,i]==levels(dbtable[,i])[2] & !is.na(dbtable[,i]),i]) /length(dbtable[ !is.na(dbtable[,i]),i])*100,digits=1),
                            ")" )
    
    form<-as.formula(paste0(m, "~", cat2[i], "+", paste(covs, collapse = "+")))
    lmloop<-lm(form, data=dbhelix)
    
    table1[j[i],3]<-"-"
    table1[j[i]+1,3]<-paste(round(coef(lmloop)[2],digits=2),
                            "(", 
                            round(confint(lmloop, level = 0.95)[2,1],digits=2),
                            ",",
                            round(confint(lmloop, level = 0.95)[2,2],digits=2), ")")
    
    table1[j[i]+1,4]<-coef(lmloop)[2]
    table1[j[i]+1,5]<-confint(lmloop, level = 0.95)[2,1]
    table1[j[i]+1,6]<-confint(lmloop, level = 0.95)[2,2]
    
    table1[j[i],7]<-"-"
    table1[j[i]+1,7]<-signif(summary(lmloop)$coefficients[2,4],digits=2)
    
  }
  
  
  dbtable<-dbhelix[which(!is.na(dbhelix[m])),cat3]
  i=1
  j = seq((length(cat2)*2)+1, ((length(cat2)*2)+(length(cat3))*3), by=3)
  
  for(i in 1:length(cat3))
  {
    table1[j[i],1]<-paste(names(dbtable)[i],levels(dbtable[,i])[1], sep="-")
    table1[j[i]+1,1]<-paste(names(dbtable)[i],levels(dbtable[,i])[2], sep="-")
    table1[j[i]+2,1]<-paste(names(dbtable)[i],levels(dbtable[,i])[3], sep="-")
    
    table1[j[i],2]<-paste(length(dbtable[dbtable[,i]==levels(dbtable[,i])[1] & !is.na(dbtable[,i]),i]),
                          " (", 
                          round(length(dbtable[dbtable[,i]==levels(dbtable[,i])[1] & !is.na(dbtable[,i]),i]) /length(dbtable[!is.na(dbtable[,i]),i])*100,digits=1),
                          ")" )
    
    table1[j[i]+1,2]<-paste(length(dbtable[dbtable[,i]==levels(dbtable[,i])[2] & !is.na(dbtable[,i]),i]),
                            " (", 
                            round(length(dbtable[dbtable[,i]==levels(dbtable[,i])[2] & !is.na(dbtable[,i]),i]) /length(dbtable[!is.na(dbtable[,i]),i])*100,digits=1),
                            ")" )
    table1[j[i]+2,2]<-paste(length(dbtable[dbtable[,i]==levels(dbtable[,i])[3] & !is.na(dbtable[,i]),i]),
                            " (", 
                            round(length(dbtable[dbtable[,i]==levels(dbtable[,i])[3] & !is.na(dbtable[,i]),i]) /length(dbtable[!is.na(dbtable[,i]),i])*100,digits=1),
                            ")" )
    
    form<-as.formula(paste0(m, "~", cat3[i], "+", paste(covs, collapse = "+")))
    lmloop<-lm(form, data=dbhelix)
    
    table1[j[i],3]<-"-"
    table1[j[i]+1,3]<-paste(round(coef(lmloop)[2],digits=2),
                            "(", 
                            round(confint(lmloop, level = 0.95)[2,1],digits=2),
                            ",",
                            round(confint(lmloop, level = 0.95)[2,2],digits=2), ")")
    table1[j[i]+2,3]<-paste(round(coef(lmloop)[3],digits=2),
                            "(", 
                            round(confint(lmloop, level = 0.95)[3,1],digits=2),
                            ",",
                            round(confint(lmloop, level = 0.95)[3,2],digits=2), ")")
    
    table1[j[i]+1,4]<-coef(lmloop)[2]
    table1[j[i]+1,5]<-confint(lmloop, level = 0.95)[2,1]                     
    table1[j[i]+1,6]<-confint(lmloop, level = 0.95)[2,2]
    
    table1[j[i]+2,4]<-coef(lmloop)[3]
    table1[j[i]+2,5]<-confint(lmloop, level = 0.95)[3,1]                     
    table1[j[i]+2,6]<-confint(lmloop, level = 0.95)[3,2]
    
    table1[j[i],7]<-"-"
    table1[j[i]+1,7]<-signif(summary(lmloop)$coefficients[2,4],digits=2)
    table1[j[i]+2,7]<-signif(summary(lmloop)$coefficients[3,4],digits=2)
    
  }
  dbtable<-dbhelix[which(!is.na(dbhelix[m])),contvar]
  
  
  i=1
  j = seq(((length(cat2)*2)+(length(cat3))*3)+1, (((length(cat2)*4)+(length(cat3))*3)+(length(contvar))), by=1)
  
  for(i in 1:length(contvar))
  {
    table1[j[i],1]<-contvar[i]
    table1[j[i],2]<-paste(round(mean(dbtable[,i], na.rm = T), digits = 2),
                          " (", 
                          round(sd(dbtable[,i], na.rm = T),digits = 2),
                          ")" )
    
    form<-as.formula(paste0(m, "~", contvar[i], "+", paste(covs, collapse = "+")))
    lmloop<-lm(form, data=dbhelix)
    
    table1[j[i],3]<-paste(round(coef(lmloop)[2],digits=3),
                          "(", 
                          round(confint(lmloop, level = 0.95)[2,1],digits=3),
                          ",",
                          round(confint(lmloop, level = 0.95)[2,2],digits=3), ")")
    
    table1[j[i],4]<-coef(lmloop)[2]
    table1[j[i],5]<-confint(lmloop, level = 0.95)[2,1]
    table1[j[i],6]<-confint(lmloop, level = 0.95)[2,2]
    
    table1[j[i],7]<- signif(summary(lmloop)$coefficients[2,4],digits=2)
    
  }
  
  #
  j= (length(cat2)*2)+(length(cat3)*3)+ length(contvar)
  
  table1[j+1,1]<-paste("h_cohort",levels(dbhelix$h_cohort)[1], sep="-")
  table1[j+2,1]<-paste("h_cohort",levels(dbhelix$h_cohort)[2], sep="-")
  table1[j+3,1]<-paste("h_cohort",levels(dbhelix$h_cohort)[3], sep="-")
  table1[j+4,1]<-paste("h_cohort",levels(dbhelix$h_cohort)[4], sep="-")
  table1[j+5,1]<-paste("h_cohort",levels(dbhelix$h_cohort)[5], sep="-")
  table1[j+6,1]<-paste("h_cohort",levels(dbhelix$h_cohort)[6], sep="-")
  
  table1[j+1,2]<-paste(length(which(dbhelix$h_cohort[which(!is.na(dbhelix[m]))]==levels(dbhelix$h_cohort[which(!is.na(dbhelix[m]))])[1])),  " (", 
                       round(length(which(dbhelix$h_cohort[which(!is.na(dbhelix[m]))]==levels(dbhelix$h_cohort[which(!is.na(dbhelix[m]))])[1])) /length(dbhelix$h_cohort[which(!is.na(dbhelix[m]))])*100,digits=1), ")" )
  table1[j+2,2]<-paste(length(which(dbhelix$h_cohort[which(!is.na(dbhelix[m]))]==levels(dbhelix$h_cohort[which(!is.na(dbhelix[m]))])[2])), " (", 
                       round(length(which(dbhelix$h_cohort[which(!is.na(dbhelix[m]))]==levels(dbhelix$h_cohort[which(!is.na(dbhelix[m]))])[2])) /length(dbhelix$h_cohort[which(!is.na(dbhelix[m]))])*100,digits=1), ")" )
  table1[j+3,2]<-paste(length(which(dbhelix$h_cohort[which(!is.na(dbhelix[m]))]==levels(dbhelix$h_cohort[which(!is.na(dbhelix[m]))])[3]))," (", 
                       round(length(which(dbhelix$h_cohort[which(!is.na(dbhelix[m]))]==levels(dbhelix$h_cohort[which(!is.na(dbhelix[m]))])[3])) /length(dbhelix$h_cohort[which(!is.na(dbhelix[m]))])*100,digits=1),  ")" )
  table1[j+4,2]<-paste(length(which(dbhelix$h_cohort[which(!is.na(dbhelix[m]))]==levels(dbhelix$h_cohort[which(!is.na(dbhelix[m]))])[4])), " (", 
                       round(length(which(dbhelix$h_cohort[which(!is.na(dbhelix[m]))]==levels(dbhelix$h_cohort[which(!is.na(dbhelix[m]))])[4])) /length(dbhelix$h_cohort[which(!is.na(dbhelix[m]))])*100,digits=1),  ")" )
  table1[j+5,2]<-paste(length(which(dbhelix$h_cohort[which(!is.na(dbhelix[m]))]==levels(dbhelix$h_cohort[which(!is.na(dbhelix[m]))])[5])), " (", 
                       round(length(which(dbhelix$h_cohort[which(!is.na(dbhelix[m]))]==levels(dbhelix$h_cohort[which(!is.na(dbhelix[m]))])[5])) /length(dbhelix$h_cohort[which(!is.na(dbhelix[m]))])*100,digits=1), ")" )
  table1[j+6,2]<-paste(length(which(dbhelix$h_cohort[which(!is.na(dbhelix[m]))]==levels(dbhelix$h_cohort[which(!is.na(dbhelix[m]))])[6])), " (", 
                       round(length(which(dbhelix$h_cohort[which(!is.na(dbhelix[m]))]==levels(dbhelix$h_cohort[which(!is.na(dbhelix[m]))])[6])) /length(dbhelix$h_cohort[which(!is.na(dbhelix[m]))])*100,digits=1), ")" )
  
  form<-as.formula(paste0(m, "~",  "h_cohort +", paste(covs, collapse = "+")))
  lmloop<-lm(form, data=dbhelix)
  
  table1[j+1,3]<-"-"
  table1[j+2,3]<-paste(round(coef(lmloop)[2],digits=2),"(", round(confint(lmloop, level = 0.95)[2,1],digits=2), ",",  round(confint(lmloop, level = 0.95)[2,2],digits=2), ")")
  table1[j+3,3]<-paste(round(coef(lmloop)[3],digits=2),"(", round(confint(lmloop, level = 0.95)[3,1],digits=2), ",",  round(confint(lmloop, level = 0.95)[3,2],digits=2), ")")
  table1[j+4,3]<-paste(round(coef(lmloop)[4],digits=2),"(", round(confint(lmloop, level = 0.95)[4,1],digits=2), ",",  round(confint(lmloop, level = 0.95)[4,2],digits=2), ")")
  table1[j+5,3]<-paste(round(coef(lmloop)[5],digits=2),"(", round(confint(lmloop, level = 0.95)[5,1],digits=2), ",",  round(confint(lmloop, level = 0.95)[5,2],digits=2), ")")
  table1[j+6,3]<-paste(round(coef(lmloop)[6],digits=2),"(", round(confint(lmloop, level = 0.95)[6,1],digits=2), ",",  round(confint(lmloop, level = 0.95)[6,2],digits=2), ")")
  
  table1[j+1,7]<-"-"
  table1[j+2,7]<-signif(summary(lmloop)$coefficients[2,4],digits=2)
  table1[j+3,7]<-signif(summary(lmloop)$coefficients[3,4],digits=2)
  table1[j+4,7]<-signif(summary(lmloop)$coefficients[4,4],digits=2)
  table1[j+5,7]<-signif(summary(lmloop)$coefficients[5,4],digits=2)
  table1[j+6,7]<-signif(summary(lmloop)$coefficients[6,4],digits=2)
  
  #
  
  table1<-as.data.frame(table1)
  names(table1)<-c("Varible", "N(%) or Mean(SD)", paste(m,c("coef(95%CI)","coef", "LB", "UB", "pval"), sep="_"))
  
    exposures_results_mod0[[m]]<-table1
}

###write tables 1 and 2#####

exporttable<-cbind(exposures_results_mod0[[ exposures[1]]],exposures_results_mod0[[ exposures[2] ]],
            exposures_results_mod0[[ exposures[3]]],exposures_results_mod0[[ exposures[4] ]])
write.csv(exporttable, file= "BAAdeterminats.csv", row.names = F)
remove(exporttable)

##developmental outcomes analysis########
development_results = list()
development_figure1 = list()
development_figure2 = list()
development_figure3 = list()
development_figure4 = list()


for(m in exposures){
  
  covs1<-c("h_cohort", "h_sex","h_ethnicity_3catc", "hs_age_years" )
  covs2<-c("h_cohort", "h_sex","h_ethnicity_3catc", "hs_age_years","NK","Bcell","CD4T","CD8T","Mono","Eos", "Neu"  )
  covs3<-c("h_cohort", "h_sex","h_ethnicity_3catc","hs_age_years" ,"FAS_cat","SCS.cat" , "bw_kg","hs_globalexp2","hs_globalsmok_m_None"  )
  covs4<-c("h_cohort", "h_sex","h_ethnicity_3catc","hs_age_years" ,"NK","Bcell","CD4T","CD8T","Mono","Eos", "Neu","FAS_cat","SCS.cat" , "bw_kg","hs_globalexp2","hs_globalsmok_m_None" )
  
  table2<-matrix(NA,length(outcome1), 13)
  table2[,1]<-outcome1
  
  fig2atable<-matrix(NA,length(outcome1), 5)
  fig2atable[,1]<-outcome1
  
  fig2btable<-matrix(NA,length(outcome1), 5)
  fig2btable[,1]<-outcome1
  
  fig2ctable<-matrix(NA,length(outcome1), 5)
  fig2ctable[,1]<-outcome1
  
  fig2dtable<-matrix(NA,length(outcome1), 5)
  fig2dtable[,1]<-outcome1
  
  
  for (i in 1:length(outcome1)){
    form<-as.formula(paste0(outcome1[i], "~", m, "+", paste(covs1, collapse = "+")))
    model<-lm(form, data=dbhelix)
    
    table2[i,2]<-nobs(model)
    table2[i,3]<-paste0(round(coef(model)[2],digits=2),
                        " (", 
                        round(confint(model, level = 0.95)[2,1],digits=2),
                        ", ",
                        round(confint(model, level = 0.95)[2,2],digits=2), ")")
    table2[i,4]<-signif(summary(model)$coefficients[2,4], digits = 2)
    
    fig2atable[i,2]<-round(coef(model)[2],digits=2)
    fig2atable[i,3]<-round(confint(model, level = 0.95)[2,1],digits=2)
    fig2atable[i,4]<-round(confint(model, level = 0.95)[2,2],digits=2)
    fig2atable[i,5]<-signif(summary(model)$coefficients[2,4], digits = 2)
    
    form<-as.formula(paste0(outcome1[i], "~", m, "+", paste(covs2, collapse = "+")))
    model<-lm(form, data=dbhelix)
    
    table2[i,5]<-nobs(model)
    table2[i,6]<-paste0(round(coef(model)[2],digits=2),
                        " (", 
                        round(confint(model, level = 0.95)[2,1],digits=2),
                        ", ",
                        round(confint(model, level = 0.95)[2,2],digits=2), ")")
    table2[i,7]<-signif(summary(model)$coefficients[2,4], digits = 2)
    
    fig2btable[i,2]<-round(coef(model)[2],digits=2)
    fig2btable[i,3]<-round(confint(model, level = 0.95)[2,1],digits=2)
    fig2btable[i,4]<-round(confint(model, level = 0.95)[2,2],digits=2)
    fig2btable[i,5]<-signif(summary(model)$coefficients[2,4], digits = 2)
    
    form<-as.formula(paste0(outcome1[i], "~", m, "+", paste(covs3, collapse = "+")))
    model<-lm(form, data=dbhelix)
    
    table2[i,8]<-nobs(model)
    table2[i,9]<-paste0(round(coef(model)[2],digits=2),
                        " (", 
                        round(confint(model, level = 0.95)[2,1],digits=2),
                        ", ",
                        round(confint(model, level = 0.95)[2,2],digits=2), ")")
    table2[i,10]<-signif(summary(model)$coefficients[2,4], digits = 2)
    
    fig2ctable[i,2]<-round(coef(model)[2],digits=2)
    fig2ctable[i,3]<-round(confint(model, level = 0.95)[2,1],digits=2)
    fig2ctable[i,4]<-round(confint(model, level = 0.95)[2,2],digits=2)
    fig2ctable[i,5]<-signif(summary(model)$coefficients[2,4], digits = 2)
    
    form<-as.formula(paste0(outcome1[i], "~", m, "+", paste(covs4, collapse = "+")))
    model<-lm(form, data=dbhelix)
    
    table2[i,11]<-nobs(model)
    table2[i,12]<-paste0(round(coef(model)[2],digits=2),
                         " (", 
                         round(confint(model, level = 0.95)[2,1],digits=2),
                         ", ",
                         round(confint(model, level = 0.95)[2,2],digits=2), ")")
    table2[i,13]<-signif(summary(model)$coefficients[2,4], digits = 2)
    
    fig2dtable[i,2]<-round(coef(model)[2],digits=2)
    fig2dtable[i,3]<-round(confint(model, level = 0.95)[2,1],digits=2)
    fig2dtable[i,4]<-round(confint(model, level = 0.95)[2,2],digits=2)
    fig2dtable[i,5]<-signif(summary(model)$coefficients[2,4], digits = 2)
    
  }
  
  #PD###
  table4<-matrix(NA,length(outcome3), 13)
  
  
  fig4atable<-matrix(NA,length(outcome3), 5)
  fig4atable[,1]<-paste("logOdds",outcome3, sep="_")
  
  fig4btable<-matrix(NA,length(outcome3), 5)
  fig4btable[,1]<-paste("logOdds",outcome3, sep="_")
  
  fig4ctable<-matrix(NA,length(outcome3), 5)
  fig4ctable[,1]<-paste("logOdds",outcome3, sep="_")
  
  fig4dtable<-matrix(NA,length(outcome3), 5)
  fig4dtable[,1]<-paste("logOdds",outcome3, sep="_")
  
  
  table4[,1]<-outcome3
  for (i in 1:length(outcome3)){
    form<-as.formula(paste0(outcome3[i], "~", m, "+", paste(covs1, collapse = "+")))
    model<-glm(form, data=dbhelix, family = "binomial")
    
    table4[i,2]<-nobs(model)
    table4[i,3]<-paste0(round(exp(coef(model)[2]),digits=2),
                        " (", 
                        round(exp(confint(model, level = 0.95)[2,1]),digits=2),
                        ", ",
                        round(exp(confint(model, level = 0.95)[2,2]),digits=2), ")")
    table4[i,4]<-signif(summary(model)$coefficients[2,4], digits = 2)
    
    fig4atable[i,2]<-round(coef(model)[2],digits=2)
    fig4atable[i,3]<-round(confint(model, level = 0.95)[2,1],digits=2)
    fig4atable[i,4]<-round(confint(model, level = 0.95)[2,2],digits=2)
    fig4atable[i,5]<-signif(summary(model)$coefficients[2,4], digits = 2)
    
    form<-as.formula(paste0(outcome3[i], "~", m, "+", paste(covs2, collapse = "+")))
    model<-glm(form, data=dbhelix, family = "binomial")
    
    table4[i,5]<-nobs(model)
    table4[i,6]<-paste0(round(exp(coef(model)[2]),digits=2),
                        " (", 
                        round(exp(confint(model, level = 0.95)[2,1]),digits=2),
                        ", ",
                        round(exp(confint(model, level = 0.95)[2,2]),digits=2), ")")
    table4[i,7]<-signif(summary(model)$coefficients[2,4], digits = 2)
    
    fig4btable[i,2]<-round(coef(model)[2],digits=2)
    fig4btable[i,3]<-round(confint(model, level = 0.95)[2,1],digits=2)
    fig4btable[i,4]<-round(confint(model, level = 0.95)[2,2],digits=2)
    fig4btable[i,5]<-signif(summary(model)$coefficients[2,4], digits = 2)
    
    form<-as.formula(paste0(outcome3[i], "~", m, "+", paste(covs3, collapse = "+")))
    model<-glm(form, data=dbhelix, family = "binomial")
    
    table4[i,8]<-nobs(model)
    table4[i,9]<-paste0(round(exp(coef(model)[2]),digits=2),
                        " (", 
                        round(exp(confint(model, level = 0.95)[2,1]),digits=2),
                        ", ",
                        round(exp(confint(model, level = 0.95)[2,2]),digits=2), ")")
    
    table4[i,10]<-signif(summary(model)$coefficients[2,4], digits = 2)
    
    fig4ctable[i,2]<-round(coef(model)[2],digits=2)
    fig4ctable[i,3]<-round(confint(model, level = 0.95)[2,1],digits=2)
    fig4ctable[i,4]<-round(confint(model, level = 0.95)[2,2],digits=2)
    fig4ctable[i,5]<-signif(summary(model)$coefficients[2,4], digits = 2)
    
    form<-as.formula(paste0(outcome3[i], "~", m, "+", paste(covs4, collapse = "+")))
    model<-glm(form, data=dbhelix, family = "binomial")
    
    table4[i,11]<-nobs(model)
    table4[i,12]<-paste0(round(exp(coef(model)[2]),digits=2),
                         " (", 
                         round(exp(confint(model, level = 0.95)[2,1]),digits=2),
                         ", ",
                         round(exp(confint(model, level = 0.95)[2,2]),digits=2), ")")
    table4[i,13]<-signif(summary(model)$coefficients[2,4], digits = 2)
    
    fig4dtable[i,2]<-round(coef(model)[2],digits=2)
    fig4dtable[i,3]<-round(confint(model, level = 0.95)[2,1],digits=2)
    fig4dtable[i,4]<-round(confint(model, level = 0.95)[2,2],digits=2)
    fig4dtable[i,5]<-signif(summary(model)$coefficients[2,4], digits = 2)
    
  }
  
  fulltable<-rbind(table2,  table4)
  fulltable<-as.data.frame(fulltable)
  names(fulltable)<-c("variable"  ,"M1_N", "M1Coef", "M1pval","M2_N","M2Coef", "M2pval", "M3_N","M3Coef", "M3pval","M4_N", "M4Coef", "M4pval")
  
  figtable1<-rbind(fig2atable, fig4atable)
  figtable1<-as.data.frame(figtable1)
  
  figtable2<-rbind(fig2btable, fig4btable)
  figtable2<-as.data.frame(figtable2)
  
  figtable3<-rbind(fig2ctable, fig4ctable)
  figtable3<-as.data.frame(figtable3)
  
  figtable4<-rbind(fig2dtable, fig4dtable)
  figtable4<-as.data.frame(figtable4)
  
  development_results[[m]]<-fulltable
  development_figure1 [[m]]<-figtable1
  development_figure2 [[m]]<-figtable2
  development_figure3 [[m]]<-figtable3
  development_figure4 [[m]]<-figtable4
  
}


###write table S5#####

exporttable<-rbind(development_results[[ exposures[1]]],development_results[[ exposures[2] ]],
            development_results[[ exposures[3]]],development_results[[ exposures[4] ]])
write.csv(exporttable, file= "BAAdevelopmentresultsv2.csv", row.names = F)
remove(exporttable)

##calculate FDR threshold######
pvals<-read.csv("allpvals_v2.csv", header = F)#import list of all p values extracted from table 2 and figure 2
pvals<-pvals$V1
qvals<-p.adjust(pvals, method = "BH")
fdrcorr<-cbind(pvals, qvals)
max(pvals[which(qvals<0.05)])

##plot figures 2 and S5- S8####
Titles=c("Telomere Length","DNA Methylation Age", "Transcriptome Age", "Immunometabolic Age")
xlabels=c("SD increase per SD shortening of telomere length", "SD increase per year increase in delta age",
          "SD increase per year increase in delta age", "SD increase per year increase in delta age")

incl<-1:nrow(plottable)

developmentnames<-c("Height z-score", "BMI z-score", "Adiposity (BIA fat-mass %)",
                    "Working memory (3-back d')", "Inattentiveness (ANT-HRT)", "Fluid Intelligence (CPM)",
                    "Internalizing Behaviours (CBCL)", "Externalizing behaviours (CBCL)",
                    "Lung Function (FEV1)", "Puberty Onset (Log Odds of PDS >1)") 

pvalsTL<-c("0.89","0.00082*","0.0093" ,"0.37" , "0.78" ,"0.21" , "0.33", "0.032", "0.75","0.36")
pvalsDNA<-c("6.2e-06*", "7.7e-05*" ,"0.00042*", "0.19"  ,  "0.03"  ,  "0.21",    "0.094" ,  "0.01"  ,  "0.085" ,  "0.058")
pvalsRNA<-c( "0.014", "0.005*", "0.079", "0.84",  "0.11" , "0.67"  ,"0.13" ,"0.083" ,"0.08" , "0.42"  )
pvalsIM<-c( "4.3e-11*" ,"3.8e-19*", "5e-06*", "0.0036*",  "5e-04*", "0.08" ,  "0.053"  , "0.82"  ,  "0.16"   , "0.046"    )
#plottable[incl,5]

i=4 #select exposure to plot
exposure=  exposures[i] 
title=Titles[i]
xlabel=xlabels[i]
#plottable<-development_figure[[exposure]] #change depending on adjustment set
plottable<-development_figure1[[exposure]]



forestplot(cbind(developmentnames, paste0("p=", plottable[incl,5])),#pvalsTL#to use stared p values instead
           mean = cbind(as.numeric(plottable[incl,2])),
           lower = cbind(as.numeric(plottable[incl,3])),
           upper = cbind(as.numeric(plottable[incl,4])),
           txt_gp =fpTxtGp(cex=1.3, ticks = gpar(cex=0.8), xlab  = gpar(cex=1)),
           boxsize = 0.25,line.margin = .2,
           xlab=xlabel, hrzl_lines =T,title= title)


##Stratification by sex sensitivity analysis ###############

development_figureBoy = list()
db.s=dbhelix[dbhelix$h_sex==1,]

#development_figureGirl = list()
#db.s=dbhelix[dbhelix$h_sex==2,]


covs.sens<-c("h_cohort", "h_ethnicity_3catc", "hs_age_years" )

for(m in exposures){
  
  fig2atable<-matrix(NA,length(outcome1), 5)
  fig2atable[,1]<-outcome1
  
  for (i in 1:length(outcome1)){
    form<-as.formula(paste0(outcome1[i], "~", m, "+", paste(covs.sens, collapse = "+")))
    model<-lm(form, data=db.s)
    
    
    fig2atable[i,2]<-round(coef(model)[2],digits=2)
    fig2atable[i,3]<-round(confint(model, level = 0.95)[2,1],digits=2)
    fig2atable[i,4]<-round(confint(model, level = 0.95)[2,2],digits=2)
    fig2atable[i,5]<-signif(summary(model)$coefficients[2,4], digits = 2)
  }
  
  #PD##
  
  fig4atable<-matrix(NA,length(outcome3), 5)
  fig4atable[,1]<-paste("logOdds",outcome3, sep="_")
  
  table4[,1]<-outcome3
  for (i in 1:length(outcome3)){
    form<-as.formula(paste0(outcome3[i], "~", m, "+", paste(covs.sens, collapse = "+")))
    model<-glm(form, data=db.s, family = "binomial")
    
    fig4atable[i,2]<-round(coef(model)[2],digits=2)
    fig4atable[i,3]<-round(confint(model, level = 0.95)[2,1],digits=2)
    fig4atable[i,4]<-round(confint(model, level = 0.95)[2,2],digits=2)
    fig4atable[i,5]<-signif(summary(model)$coefficients[2,4], digits = 2)
    
  }
  
  
  figtable1<-rbind(fig2atable, fig4atable)
  figtable1<-as.data.frame(figtable1)
  
  development_figureBoy [[m]]<-figtable1
  #development_figureGirl [[m]]<-figtable1
  
}


##plot figure S4###########
i=4
exposure=  exposures[i] 
title=Titles[i]
xlabel=xlabels[i]
plottableB<-development_figureBoy[[exposure]]
plottableG<-development_figureGirl[[exposure]]


forestplot(cbind(developmentnames, paste0("p=", plottableB[incl,5]), paste0("p=", plottableG[incl,5])),#pvalsTL
           mean = cbind(as.numeric(plottableB[incl,2]),as.numeric(plottableG[incl,2])),
           lower = cbind(as.numeric(plottableB[incl,3]),as.numeric(plottableG[incl,3])),
           upper = cbind(as.numeric(plottableB[incl,4]),as.numeric(plottableG[incl,4])),
           txt_gp =fpTxtGp(cex=1.3, ticks = gpar(cex=0.8), xlab  = gpar(cex=1), legend= gpar(cex=1.3)),
           boxsize = 0.25,line.margin = .2,col=fpColors(box=c("blue", "red")),  legend = c("Boys", "Girls"),
           fn.ci_norm = c(fpDrawNormalCI, fpDrawCircleCI),
           xlab=xlabel, hrzl_lines =T,title= title)

###generate deidentified fig 1 and 2 gernartion dataset for eLIFE
db.s=dbhelix[which(!is.na(dbhelix$TL) | !is.na(dbhelix$skinHorvath) | !is.na(dbhelix$mRNAage) | !is.na(dbhelix$predage.unadj)),c("TL", "skinHorvath","mRNAage","predage.unadj", "h_cohort","hs_age_years")]
write.csv(db.s, file= "C:/Users/Oliver Robinson/OneDrive - Imperial College London/Metage groupshare/Children/Analysis/HELIX/Elife supplmentary dataset.csv", row.names = F)
